In [ ]:
import rasterio
from rasterio.mask import mask
import geopandas as gpd
import numpy as np
from shapely.geometry import mapping
import geopandas as gpd
In [ ]:
import os # we need os to do some basic file operations

sentinal_fp = "Imagery/"
# find every file in the sentinal_fp directory
sentinal_band_paths = [os.path.join(sentinal_fp, f) for f in os.listdir(sentinal_fp) if os.path.isfile(os.path.join(sentinal_fp, f))]
sentinal_band_paths.sort()
sentinal_band_paths
Out[ ]:
['Imagery/Image_bbox01.tif',
 'Imagery/Image_bbox01.tif.aux.xml',
 'Imagery/Image_bbox01.tif.ovr',
 'Imagery/satellite_prompts.tif',
 'Imagery/satellite_prompts2.tif']
In [ ]:
file_path = 'C:/Users/leoni/Documents/Uni/UGS/Project/Classification_data/data/osm_polygon.gpkg'

# Load the dataset
data = gpd.read_file(file_path)
data
Out[ ]:
osmid class geometry
0 24537862 recreation_ground POLYGON ((10.89427 48.30580, 10.89428 48.30618...
1 24795237 recreation_ground POLYGON ((10.90226 48.28125, 10.90227 48.28118...
2 26552754 recreation_ground POLYGON ((10.87407 48.36330, 10.87403 48.36304...
3 34244079 recreation_ground POLYGON ((10.92775 48.35498, 10.92767 48.35493...
4 34995210 recreation_ground POLYGON ((10.87226 48.35491, 10.87230 48.35486...
... ... ... ...
6246 1227871965 building POLYGON ((10.91509 48.35313, 10.91507 48.35313...
6247 645024044 building POLYGON ((10.86227 48.31631, 10.86227 48.31625...
6248 1175155694 building POLYGON ((10.85076 48.39985, 10.85086 48.39985...
6249 111755134 building POLYGON ((10.90005 48.39400, 10.90010 48.39395...
6250 1208722589 building POLYGON ((10.92128 48.38857, 10.92121 48.38855...

6251 rows × 3 columns

In [ ]:
data.plot()
Out[ ]:
<Axes: >
No description has been provided for this image
In [ ]:
# create a products directory within the data dir which won't be uploaded to Github
img_dir = 'Imagery/'

# check to see if the dir it exists, if not, create it
if not os.path.exists(img_dir):
    os.makedirs(img_dir)

# filepath for image we're writing out
img_fp = img_dir + 'Image_bbox01.tif'

# Read metadata of first file and assume all other bands are the same
with rasterio.open(sentinal_band_paths[0]) as src0:
    meta = src0.meta

# Update metadata to reflect the number of layers
meta.update(count = len(sentinal_band_paths))

# # Read each layer and write it to stack
# with rasterio.open(img_fp, 'w', **meta) as dst:
#     for id, layer in enumerate(sentinal_band_paths, start=1):
#         with rasterio.open(layer) as src1:
#             dst.write_band(id, src1.read(1))
In [ ]:
full_dataset = rasterio.open(img_fp)
img_rows, img_cols = full_dataset.shape
img_bands = full_dataset.count
print(full_dataset.shape) # dimensions
print(full_dataset.count) # bands
(11224, 7457)
3
In [ ]:
full_dataset
Out[ ]:
<open DatasetReader name='Imagery/Image_bbox01.tif' mode='r'>
In [ ]:
import numpy as np
from rasterio.plot import show
import matplotlib.pyplot as plt

img_fp = 'Imagery/Image_bbox01.tif'

# Open the full extent of the raster dataset
with rasterio.open(img_fp) as dataset:
    # Read the specified bands (3, 2, 1 for RGB)
    img_data = dataset.read([3, 2, 1])
# Assuming you've already read the img_data
# Scale the data to 0-255 range, assuming it's 16-bit
img_data = np.clip(img_data / 4096 * 255, 0, 255).astype(np.uint8)

# Mask out no data values if necessary
no_data_value = -9999  # Replace with your actual no data value
img_data[img_data == no_data_value] = 0  # Set no data values to 0 for display

# Now display the image
fig, ax = plt.subplots(figsize=(10, 7))
show(img_data, ax=ax, transform=dataset.transform)

plt.show()
No description has been provided for this image
In [ ]:
full_dataset = rasterio.open(img_fp)
raster_crs = full_dataset.crs
In [ ]:
from rasterio.warp import calculate_default_transform, reproject, Resampling

# Define the source and destination coordinate reference systems
src_crs = 'EPSG:32632'
dst_crs = 'EPSG:4326'

# Open the original dataset
with rasterio.open('Imagery/Image_bbox01.tif') as src:
    # Calculate the transform and dimensions for the new dataset
    transform, width, height = calculate_default_transform(
        src.crs, dst_crs, src.width, src.height, *src.bounds)
    
    # Define the metadata for the new dataset
    kwargs = src.meta.copy()
    kwargs.update({
        'crs': dst_crs,
        'transform': transform,
        'width': width,
        'height': height
    })

    # Create the new dataset and reproject the original data into it
    with rasterio.open('Imagery/Image_bbox012.tif', 'w', **kwargs) as dst:
        for i in range(1, src.count + 1):  # Loop through all bands
            reproject(
                source=rasterio.band(src, i),
                destination=rasterio.band(dst, i),
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=dst_crs,
                resampling=Resampling.nearest)
In [ ]:
shapefile = gpd.read_file('C:/Users/leoni/Documents/Uni/UGS/Project/Classification_data/data/osm_polygon.gpkg')
shapefile.crs
Out[ ]:
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [ ]:
shapefile = shapefile.to_crs(epsg=32632)
shapefile.crs
Out[ ]:
<Projected CRS: EPSG:32632>
Name: WGS 84 / UTM zone 32N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 6°E and 12°E, northern hemisphere between equator and 84°N, onshore and offshore. Algeria. Austria. Cameroon. Denmark. Equatorial Guinea. France. Gabon. Germany. Italy. Libya. Liechtenstein. Monaco. Netherlands. Niger. Nigeria. Norway. Sao Tome and Principe. Svalbard. Sweden. Switzerland. Tunisia. Vatican City State.
- bounds: (6.0, 0.0, 12.0, 84.0)
Coordinate Operation:
- name: UTM zone 32N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [ ]:
shapefile
Out[ ]:
osmid class geometry
0 24537862 recreation_ground POLYGON ((640464.148 5352023.221, 640463.987 5...
1 24795237 recreation_ground POLYGON ((641124.381 5349310.187, 641125.245 5...
2 26552754 recreation_ground POLYGON ((638809.721 5358378.274, 638808.031 5...
3 34244079 recreation_ground POLYGON ((642809.012 5357551.256, 642803.489 5...
4 34995210 recreation_ground POLYGON ((638698.680 5357441.566, 638702.034 5...
... ... ... ...
6246 1227871965 building POLYGON ((641876.757 5357322.760, 641874.943 5...
6247 645024044 building POLYGON ((638062.869 5353133.172, 638062.913 5...
6248 1175155694 building POLYGON ((636985.360 5362398.072, 636992.579 5...
6249 111755134 building POLYGON ((640649.513 5361837.100, 640653.686 5...
6250 1208722589 building POLYGON ((642236.480 5361272.707, 642230.813 5...

6251 rows × 3 columns

In [ ]:
shapefile = shapefile.to_crs({'init': 'epsg:4326'})
shapefile.crs
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\pyproj\crs\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
  in_crs_string = _prepare_from_proj_string(in_crs_string)
Out[ ]:
<Geographic 2D CRS: +init=epsg:4326 +type=crs>
Name: WGS 84
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [ ]:
len(shapefile)
Out[ ]:
6251
In [ ]:
# this generates a list of shapely geometries
geoms = shapefile.geometry.values 

# let's grab a single shapely geometry to check
geometry = geoms[0] 
print(type(geometry))
print(geometry)

# transform to GeoJSON format
from shapely.geometry import mapping
feature = [mapping(geometry)] # can also do this using polygon.__geo_interface__
print(type(feature))
print(feature)
<class 'shapely.geometry.polygon.Polygon'>
POLYGON ((10.8942717 48.3057961, 10.8942839 48.3061844, 10.897539 48.30582540000001, 10.8978662 48.3051395, 10.897797799999998 48.30473709999998, 10.8976798 48.304690799999996, 10.8939032 48.305060100000006, 10.893856299999998 48.30511539999999, 10.893844199999998 48.3052019, 10.893858800000002 48.3056951, 10.893969399999998 48.30569340000001, 10.8942717 48.3057961))
<class 'list'>
[{'type': 'Polygon', 'coordinates': (((10.8942717, 48.3057961), (10.8942839, 48.3061844), (10.897539, 48.30582540000001), (10.8978662, 48.3051395), (10.897797799999998, 48.30473709999998), (10.8976798, 48.304690799999996), (10.8939032, 48.305060100000006), (10.893856299999998, 48.30511539999999), (10.893844199999998, 48.3052019), (10.893858800000002, 48.3056951), (10.893969399999998, 48.30569340000001), (10.8942717, 48.3057961)),)}]
In [ ]:
full_dataset.close()
In [ ]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
from shapely.geometry import box


# Load the shapefile
#shapefile = gpd.read_file(shapefile_path)

# Check if the CRS matches, if not, reproject
if shapefile.crs != raster_crs:
    shapefile = shapefile.to_crs(raster_crs)

# Check that geometries are valid
shapefile['valid'] = shapefile.is_valid
shapefile = shapefile[shapefile['valid']]


with rasterio.open('Imagery/Image_bbox012.tif') as src:
    raster_bounds = src.bounds
    raster_bbox = box(*raster_bounds)  # Create a bounding box from the raster bounds

    # Only proceed if the raster and shapefile overlap
    if not shapefile.unary_union.intersects(raster_bbox):
        raise ValueError("Shapefile and raster do not overlap")
    else:
        print("The shapefile and raster overlap.")

    # Extract the raster values within the polygon
    for geom in shapefile.geometry:
        feature = [mapping(geom)]
        out_image, out_transform = mask(src, feature, crop=True)
        # process out_image
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[24], line 26
     24 # Only proceed if the raster and shapefile overlap
     25 if not shapefile.unary_union.intersects(raster_bbox):
---> 26     raise ValueError("Shapefile and raster do not overlap")
     27 else:
     28     print("The shapefile and raster overlap.")

ValueError: Shapefile and raster do not overlap

Adjust Landcover Data to match the scene¶

In [ ]:
# Reproject the shapefile to match the raster's CRS
shapefile = shapefile.to_crs(epsg=32632)
In [ ]:
with rasterio.open('Imagery/Image_bbox012.tif') as src:
    print(src.bounds)
print(shapefile.total_bounds)
BoundingBox(left=10.929471, bottom=48.357460972573826, right=10.94947202742617, top=48.377461999999994)
[ 634392.46778077 5347771.43359931  645371.9912777  5367521.43948786]
In [ ]:
shapefile = shapefile.to_crs("EPSG:4326")
print(shapefile.total_bounds)
shapefile.crs
[10.9296678 48.3576758 10.9492843 48.3773375]
Out[ ]:
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [ ]:
import matplotlib.pyplot as plt
import rasterio
import rasterio.plot

fig, ax = plt.subplots(figsize=(10, 10))
with rasterio.open('Imagery/Image_bbox012.tif') as src:
    rasterio.plot.show(src, ax=ax)
shapefile.plot(ax=ax, facecolor='none', edgecolor='red')
plt.show()
No description has been provided for this image
In [ ]:
from shapely.geometry import box
from shapely.affinity import scale

shapefile_path = 'C:/Users/leoni/Documents/Uni/UGS/Project/Classification_data/data/osm_polygon.gpkg'

# Open the raster file and get its bounds
with rasterio.open('Imagery/Image_bbox012.tif') as src:
    bounds = src.bounds

# Create a slightly smaller bounding box geometry from the raster bounds
# Reduce each side of the bounding box by a small fraction, e.g., 0.99 to reduce by 1%
smaller_bbox = scale(box(bounds.left, bounds.bottom, bounds.right, bounds.top), xfact=0.99, yfact=0.99, origin='center')

# Create a GeoDataFrame from the smaller bounding box
smaller_bbox_gdf = gpd.GeoDataFrame({'geometry': [smaller_bbox]}, crs=src.crs)

# Load the shapefile
shapefile = gpd.read_file(shapefile_path)

# Make sure the shapefile is in the same CRS as the raster
shapefile = shapefile.to_crs(src.crs)

# Perform a spatial join to find features completely within the smaller bounding box
within_shapefile = gpd.sjoin(shapefile, smaller_bbox_gdf, op='within')

# Save the clipped shapefile to a new file
within_shapefile.to_file('within_shapefile_smaller_bbox.gpkg', driver='GPKG')
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\IPython\core\interactiveshell.py:3466: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
In [ ]:
shapefile = within_shapefile
In [ ]:
import matplotlib.pyplot as plt
import rasterio
import rasterio.plot

fig, ax = plt.subplots(figsize=(10, 10))
with rasterio.open('Imagery/Image_bbox012.tif') as src:
    rasterio.plot.show(src, ax=ax)
shapefile.plot(ax=ax, facecolor='none', edgecolor='red')
plt.show()
No description has been provided for this image
In [ ]:
shapefile = shapefile.to_crs("EPSG:4326")
shapefile.crs
Out[ ]:
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [ ]:
from rasterio.mask import mask
from shapely.geometry import mapping
from shapely.geometry import box

with rasterio.open('Imagery/Image_bbox012.tif') as src:
    raster_bounds = src.bounds
    raster_bbox = box(*raster_bounds)  # Create a bounding box from the raster bounds

    # Only proceed if the raster and shapefile overlap
    if not shapefile.unary_union.intersects(raster_bbox):
        raise ValueError("Shapefile and raster do not overlap")
    else:
        print("The shapefile and raster overlap.")

    # Extract the raster values within the polygon
    for geom in shapefile.geometry:
        feature = [mapping(geom)]
        out_image, out_transform = mask(src, feature, crop=True)
The shapefile and raster overlap.
In [ ]:
import numpy as np

# Initialize a list to hold the mean values for each band
band_means = []

# Extract the raster values within the polygon
for geom in shapefile.geometry:
    feature = [mapping(geom)]
    
    with rasterio.open('Imagery/Image_bbox012.tif') as src:
        out_image, out_transform = mask(src, feature, crop=True)
        
        # Calculate mean of each band, excluding no-data values
        means = np.ma.array(out_image, mask=out_image == src.nodata).mean(axis=(1, 2))
        band_means.append(means.filled(src.nodata))
In [ ]:
import numpy as np
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
import geopandas as gpd
from rasterio.features import geometry_mask

img_fp = 'Imagery/Image_bbox012.tif'

X = np.array([], dtype=np.float32).reshape(0, 3)  # Replace '8' with the actual number of bands if different
y = []  # Initialize y as an empty list

incomplete_features = []

with rasterio.open(img_fp) as src:
    raster_crs = src.crs
    nodata = src.nodatavals[0]  # Assuming all bands have the same nodata value
    band_count = src.count

    # Iterate over the geometries in the shapefile
    for index, row in shapefile.iterrows():
        geom = row['geometry']
        # Create a mask for the current geometry
        geom_mask = geometry_mask([geom], transform=src.transform, invert=True, out_shape=(src.height, src.width))

        # Check if the geometry intersects with the raster
        if not geom_mask.all():
            out_image, out_transform = mask(src, [geom], crop=True, nodata=nodata)
            out_image_masked = np.ma.masked_equal(out_image, nodata)
            valid_data = out_image_masked.compressed()

            # Ensure we have a complete set of pixels for all bands
            if valid_data.size % band_count == 0:
                out_image_reshaped = valid_data.reshape(-1, band_count)

                # Remove rows that contain nodata values
                valid_rows = np.all(out_image_reshaped != nodata, axis=1)
                out_image_reshaped = out_image_reshaped[valid_rows]

                # Only proceed if we have valid data left after removing nodata rows
                if out_image_reshaped.size > 0:
                    X = np.vstack((X, out_image_reshaped))
                    y.extend([row['class']] * out_image_reshaped.shape[0])
            else:
                # Collect indices of incomplete features to handle later
                incomplete_features.append(index)
                print(f"Feature {index} does not have a complete set of pixels.")

# Handle incomplete features as needed
# For example, you might want to drop them from the shapefile
shapefile = shapefile.drop(incomplete_features)
In [ ]:
# Initialize empty arrays for the pixel values and labels
X = np.array([], dtype=np.float32).reshape(0, 3)  # Replace '8' with the number of bands
y = []

with rasterio.open(img_fp) as src:
    # Ensure the shapefile is in the same CRS as the raster
    shapefile = shapefile.to_crs(src.crs)
    nodata = src.nodatavals[0]  # Assuming all bands have the same nodata value

    # Loop through each feature in the shapefile
    for index, row in shapefile.iterrows():
        geom = row.geometry
        # Mask the raster with the geometry
        out_image, out_transform = mask(src, [geom], crop=True, nodata=nodata, filled=False)

        # Check if there is any valid data
        if np.any(out_image.mask == False):
            # Reshape and append to X and y
            valid_pixels = out_image.data[~out_image.mask].reshape(-1, src.count)
            X = np.vstack((X, valid_pixels))
            y.extend([row['class']] * valid_pixels.shape[0])
In [ ]:
out_image
Out[ ]:
masked_array(
  data=[[[--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --],
         ...,
         [--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --]],

        [[--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --],
         ...,
         [--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --]],

        [[--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --],
         ...,
         [--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --],
         [--, --, --, ..., --, --, --]]],
  mask=[[[ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         ...,
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True]],

        [[ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         ...,
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True]],

        [[ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         ...,
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True],
         [ True,  True,  True, ...,  True,  True,  True]]],
  fill_value=999999,
  dtype=uint8)
In [ ]:
# Assuming 'shapefile' is a GeoDataFrame that has been properly read and is in the correct CRS
geoms = shapefile.geometry.values  # This will give us a numpy array of geometry objects

X = np.array([], dtype=np.float32).reshape(0, band_count)  # Adjust dtype and band_count as needed
y = []

with rasterio.open(img_fp) as src:
    for geom in geoms:  # Loop through each geometry in the array
        feature = [mapping(geom)]  # Convert to format expected by rasterio.mask.mask

        out_image, out_transform = mask(src, feature, crop=True)
        
        # Check for pixels that are not nodata (neither 0 nor 255 in all bands)
        if out_image.any():  # If there's any non-nodata pixel
            # Filter out the nodata pixels and reshape
            out_image_reshaped = out_image[:, (out_image[0] != nodata) & (out_image[0] != 0) & (out_image[0] != 255)].reshape(-1, band_count)
            
            # Now, we need to get the corresponding class for each geometry
            class_label = shapefile.loc[shapefile.geometry == geom, 'class'].values[0]
            
            # Extend the X and y arrays
            X = np.vstack((X, out_image_reshaped))  # Stack the pixels
            y.extend([class_label] * out_image_reshaped.shape[0])  # Extend the labels
In [ ]:
# What are our classification labels?
labels = np.unique(shapefile["class"])
print('The training data include {n} classes: {classes}\n'.format(n=labels.size, 
                                                                classes=labels))

# We will need a "X" matrix containing our features, and a "y" array containing our labels
print('Our X matrix is sized: {sz}'.format(sz=X.shape))
print('Our y array is sized: {sz}'.format(sz=np.array(y).shape))
The training data include 6 classes: ['building' 'cemetery' 'funeral_hall' 'grass' 'playground'
 'recreation_ground']

Our X matrix is sized: (4677081, 3)
Our y array is sized: (4677081,)
Our y array is sized: (4677081,)
In [ ]:
fig, ax = plt.subplots(figsize=[13, 6])

# Numbers 1-8 for bands
band_count = np.arange(1, 4)

# Convert y to numpy array for indexing
y_array = np.array(y)

# Iterate over unique classes
classes = np.unique(y_array)
for class_type in classes:
    # Calculate mean reflectance value for each band for the current class
    band_intensity = np.mean(X[y_array == class_type, :], axis=0)

    # Plot the mean reflectance values on the single plot
    ax.plot(band_count, band_intensity, label=class_type)

# Add axis labels
ax.set_xlabel('Band #')
ax.set_ylabel('Reflectance Value')

# Add a title
ax.set_title('Band Intensities Full Overview')

# Add legend outside of the plot
ax.legend(loc='upper left', bbox_to_anchor=(1, 1))

# Adjust layout to accommodate the legend
plt.tight_layout()

# Display the plot
plt.show()
No description has been provided for this image

'cemetery'
'funeral_hall'
'grass'
'memorial'
'park'
'playground'
'recreation_ground'

In [ ]:
def str_class_to_int(class_array):
    class_array[class_array == 'cemetery'] = 0
    class_array[class_array == 'funeral_hall'] = 1
    class_array[class_array == 'grass'] = 2
    class_array[class_array == 'memorial'] = 3
    class_array[class_array == 'park'] = 4
    class_array[class_array == 'playground'] = 5
    class_array[class_array == 'recreation_ground'] = 6
    class_array[class_array == 'building'] = 7
    return(class_array.astype(int))

GaussianNB Naive Bayes¶

In [ ]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
gnb.fit(X, y)
Out[ ]:
GaussianNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GaussianNB()
In [ ]:
from rasterio.plot import show
from rasterio.plot import show_hist
from rasterio.windows import Window
from rasterio.plot import reshape_as_raster, reshape_as_image
In [ ]:
with rasterio.open(img_fp) as src:
    # may need to reduce this image size if your kernel crashes, takes a lot of memory
    img = src.read()
# Take our full image and reshape into long 2d array (nrow * ncol, nband) for classification
print(img.shape)
reshaped_img = reshape_as_image(img)
print(reshaped_img.shape)
(3, 9529, 9529)
(9529, 9529, 3)
In [ ]:
class_prediction = gnb.predict(reshaped_img.reshape(-1, 3))

# Reshape our classification map back into a 2D matrix so we can visualize it
class_prediction = class_prediction.reshape(reshaped_img[:, :, 0].shape)
In [ ]:
class_prediction = str_class_to_int(class_prediction)
In [ ]:
def color_stretch(image, index):
    colors = image[:, :, index].astype(np.float64)
    for b in range(colors.shape[2]):
        colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
    return colors
    
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))

# next setup a colormap for our map
colors = dict((
    (0, (105, 105, 105, 255)),    # cemetery - dark grey
    (1, (230, 230, 250, 255)),    # funeral hall - pale lavender
    (2, (144, 238, 144, 255)),    # grass - light green
    (3, (70, 130, 180, 255)),     # memorial - deep blue
    (4, (60, 179, 113, 255)),     # park - fresh green
    (5, (255, 255, 0, 255)),      # playground - bright yellow
    (6, (255, 165, 0, 255)),      # recreation ground - bright orange
    (7, (255, 0, 0, 255)),        # buildings - red
))

# Put 0 - 255 as float 0 - 1
for k in colors:
    v = colors[k]
    _v = [_v / 255.0 for _v in v]
    colors[k] = _v
    
index_colors = [colors[key] if key in colors else 
                (255, 255, 255, 0) for key in range(0, n+1)]

cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
In [ ]:
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap

# Define the class labels and their corresponding colors
class_labels = {
    0: "cemetery",
    1: "funeral hall",
    2: "grass",
    3: "memorial",
    4: "park",
    5: "playground",
    6: "recreation ground",
    7: "buildings"
}

patches = [mpatches.Patch(color=colors[key], label=class_labels[key]) for key in class_labels]

fig, axs = plt.subplots(2,1,figsize=(20,15))

img_stretched = reshaped_img
axs[0].imshow(img_stretched)

axs[1].imshow(class_prediction, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

fig.show()
C:\Users\leoni\AppData\Local\Temp\ipykernel_4036\3190710703.py:26: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
  fig.show()
No description has been provided for this image

Random Forest¶

In [ ]:
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import rasterio
import matplotlib.pyplot as plt

# Create the Random Forest model
rf = RandomForestClassifier()

# Train the model
rf.fit(X, y)
Out[ ]:
RandomForestClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier()
In [ ]:
with rasterio.open(img_fp) as src:
    img = src.read()
reshaped_img = reshape_as_image(img)

# Predict the classes using the Random Forest model
class_prediction = rf.predict(reshaped_img.reshape(-1, 3))

# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction) 
In [ ]:
def color_stretch(image, index):
    colors = image[:, :, index].astype(np.float64)
    for b in range(colors.shape[2]):
        colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
    return colors
    
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))

# next setup a colormap for our map
colors = dict((
    (0, (105, 105, 105, 255)),    # cemetery - dark grey
    (1, (230, 230, 250, 255)),    # funeral hall - pale lavender
    (2, (144, 238, 144, 255)),    # grass - light green
    (3, (70, 130, 180, 255)),     # memorial - deep blue
    (4, (60, 179, 113, 255)),     # park - fresh green
    (5, (255, 255, 0, 255)),      # playground - bright yellow
    (6, (255, 165, 0, 255)),      # recreation ground - bright orange
    (7, (255, 0, 0, 255)),        # buildings - red
))

# Put 0 - 255 as float 0 - 1
for k in colors:
    v = colors[k]
    _v = [_v / 255.0 for _v in v]
    colors[k] = _v
    
index_colors = [colors[key] if key in colors else 
                (255, 255, 255, 0) for key in range(0, n+1)]

cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
In [ ]:
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap

# Assuming reshaped_img is in the correct format
expected_shape = reshaped_img[:, :, 0].shape
class_prediction_2d = class_prediction.reshape(expected_shape)

fig, axs = plt.subplots(2, 1, figsize=(20, 15))

img_stretched = reshaped_img
axs[0].imshow(img_stretched)

# Use the reshaped 2D array for visualization
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

plt.show()
No description has been provided for this image

KMeans¶

In [ ]:
from sklearn.cluster import KMeans

bands, rows, cols = img.shape

k = 7 # num of clusters

kmeans_predictions = KMeans(n_clusters=k, random_state=0).fit(reshaped_img.reshape(-1, 8))

kmeans_predictions_2d = kmeans_predictions.labels_.reshape(rows, cols)

# Now show the classmap next to the image
fig, axs = plt.subplots(1,2,figsize=(15,8))

img_stretched = color_stretch(reshaped_img, [7, 3, 2]) #try different band combinations
axs[0].imshow(img_stretched)

axs[1].imshow(kmeans_predictions_2d)
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
Out[ ]:
<matplotlib.image.AxesImage at 0x1f00e917dd0>
No description has been provided for this image

KNeighbours¶

In [ ]:
from sklearn.neighbors import KNeighborsClassifier

# Create the KNN model
knn = KNeighborsClassifier()

# Train the model
knn.fit(X, y)
Out[ ]:
KNeighborsClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsClassifier()
In [ ]:
with rasterio.open(img_fp) as src:
    img = src.read()
reshaped_img = reshape_as_image(img)

# Predict the classes using the KNN model
class_prediction = knn.predict(reshaped_img.reshape(-1, 3))

# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction)
In [ ]:
def color_stretch(image, index):
    colors = image[:, :, index].astype(np.float64)
    for b in range(colors.shape[2]):
        colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
    return colors
    
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))

# next setup a colormap for our map
colors = dict((
    (0, (105, 105, 105, 255)),    # cemetery - dark grey
    (1, (230, 230, 250, 255)),    # funeral hall - pale lavender
    (2, (144, 238, 144, 255)),    # grass - light green
    (3, (70, 130, 180, 255)),     # memorial - deep blue
    (4, (60, 179, 113, 255)),     # park - fresh green
    (5, (255, 255, 0, 255)),      # playground - bright yellow
    (6, (255, 165, 0, 255)),      # recreation ground - bright orange
    (7, (255, 0, 0, 255)),        # buildings - red
))

# Put 0 - 255 as float 0 - 1
for k in colors:
    v = colors[k]
    _v = [_v / 255.0 for _v in v]
    colors[k] = _v
    
index_colors = [colors[key] if key in colors else 
                (255, 255, 255, 0) for key in range(0, n+1)]

cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
In [ ]:
fig, axs = plt.subplots(2, 1, figsize=(20, 15))

img_stretched = reshaped_img
axs[0].imshow(img_stretched)

class_prediction_2d = class_prediction.reshape(reshaped_img[:, :, 0].shape)
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

plt.show()
No description has been provided for this image

Decision Tree¶

In [ ]:
from sklearn.tree import DecisionTreeClassifier

# Create the Decision Tree model
dt = DecisionTreeClassifier()

# Train the model
dt.fit(X, y)
Out[ ]:
DecisionTreeClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
In [ ]:
# Read and reshape the image
with rasterio.open(img_fp) as src:
    img = src.read()
reshaped_img = reshape_as_image(img)

# Predict the classes using the Decision Tree model
class_prediction = dt.predict(reshaped_img.reshape(-1, 3))

# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction)
In [ ]:
fig, axs = plt.subplots(2, 1, figsize=(20, 15))

img_stretched = reshaped_img
axs[0].imshow(img_stretched)

class_prediction_2d = class_prediction.reshape(reshaped_img[:, :, 0].shape)
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

plt.show()
No description has been provided for this image
In [ ]: